{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import datetime\n",
    "import pandas as pd\n",
    "#dir(datetime)\n",
    "from dateutil.parser import parse"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "datetime 数据类型说明\n",
    "\n",
    "date：以公历形式存储日历日期（年，月，日）\n",
    "time：将时间存储为时，分，秒，毫秒\n",
    "datetime：存储日期和时间\n",
    "timedelta：表示两个datetime值之间的差（日，秒，毫秒）"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 字符串与datetime之间的转换\n",
    "datetime<--->字符串  ：datetime.strftime('%Y-%m-%d')\n",
    "字符串<--->datetime  :datetime.srtptime(value,'%Y-%m-%d')#注意'%Y-%m-%d'的格式，y小写则是年份的简写（如2015简写15）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2015-10-01 00:00:00\n",
      "2000-03-02\n"
     ]
    }
   ],
   "source": [
    "#字符串<--->datetime\n",
    "strtime='15-10-1'\n",
    "str2time=datetime.datetime.strptime(strtime,'%y-%m-%d')\n",
    "print str2time\n",
    "\n",
    "#datetime<--->字符串\n",
    "stamp=datetime.datetime(2000,3,2)\n",
    "stamp2str=stamp.strftime('%Y-%m-%d') #%Y-%d-%m顺序可以变\n",
    "print stamp2str"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "datetime.strptime是已知格式进行格式转换的最佳方法，但不方便的地方在于每次都需要明确说明格式（'%y-%m-%d'）\n",
    "该种情况下可以使用dateutil中的parser.parse方法，dateutil几乎可以解析能够理解的所有格式\n",
    "！！！！！但注意：dateutil.parser并不是一个好用但并不完美的解析工具，它会把一些不是日期的字符串解析为日期"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2010-01-09 00:00:00\n",
      "2001-01-21 23:23:07\n",
      "1999-02-01 00:00:00\n",
      "1999-12-13 00:00:00\n"
     ]
    }
   ],
   "source": [
    "from dateutil.parser import parse\n",
    "print parse('2010-1-9')\n",
    "print parse('Jan,21,2001,11:23:7 pm')\n",
    "print parse('1/2/1999',dayfirst=True)#在国际惯例里，喜欢把日期写在前面，如果不添加dayfirst=True，则输出的是1999-1-2\n",
    "print parse('13/12/1999')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.tseries.index.DatetimeIndex'>\n",
      "[2011-01-03, 2012-02-03]\n",
      "Length: 2, Freq: None, Timezone: None\n",
      "<class 'pandas.tseries.index.DatetimeIndex'>\n",
      "<class 'pandas.tslib.Timestamp'>\n"
     ]
    }
   ],
   "source": [
    "datestr=['2011-1-3','2012/2/3']\n",
    "pddate=pd.to_datetime(datestr)\n",
    "print pddate\n",
    "print type(pddate)\n",
    "print type(pddate[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<class 'pandas.tseries.index.DatetimeIndex'>\n",
       "[2015-01-01, ..., 2015-01-31]\n",
       "Length: 3, Freq: None, Timezone: None"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dates=[datetime.datetime(2015,1,1),datetime.datetime(2015,1,15),datetime.datetime(2015,1,31)]\n",
    "pd.DatetimeIndex(dates)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "periodindex1=pd.period_range('2015-1-1','2015-1-31',freq='D')\n",
    "#按照不同的需要产生时间序列，比如按月份连续pd.period_range('2015-1','2015-1',freq='M')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2015-01-01    0\n",
      "2015-01-02    1\n",
      "2015-01-03    2\n",
      "2015-01-04    3\n",
      "2015-01-05    4\n",
      "Freq: D, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "data2=[x for x in xrange(31)]\n",
    "DataSeries2=pd.Series(data2,index=periodindex1)\n",
    "print DataSeries2.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "'''\n",
    "时间精度的匹配问题\n",
    "如2015-1与2015-1-1的匹配\n",
    "'''\n",
    "period_in_month=pd.period_range('2015-1','2015-3',freq=\"M\")\n",
    "period_in_day=pd.period_range('2015-1-1','2015-3-31',freq=\"D\")\n",
    "period_in_month.asfreq(\"D\",how='start')\n",
    "period_in_month=period_in_month.asfreq(\"D\",how='start')\n",
    "full_ts=pd.Series(period_in_month,index=period_in_month)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2015-01-01    2015-01-01\n",
      "2015-02-01    2015-02-01\n",
      "2015-03-01    2015-03-01\n",
      "Freq: D, dtype: object\n"
     ]
    }
   ],
   "source": [
    "print full_ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2015-01-01    2015-01-01\n",
       "2015-01-02           NaN\n",
       "2015-01-03           NaN\n",
       "2015-01-04           NaN\n",
       "2015-01-05           NaN\n",
       "Freq: D, dtype: object"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "full_ts.reindex(period_in_day).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2015-01-01    2015-01-01\n",
       "2015-01-02    2015-01-01\n",
       "2015-01-03    2015-01-01\n",
       "2015-01-04    2015-01-01\n",
       "2015-01-05    2015-01-01\n",
       "Freq: D, dtype: object"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "full_ts.reindex(period_in_day,method='ffill').head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "####该部分作为引入，说明，从csv与xls读取数据，关于时间，属性是不一致的########"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.series.Series'>\n",
      "<class 'pandas.tslib.Timestamp'>\n",
      "<class 'pandas.core.series.Series'>\n",
      "<type 'str'>\n",
      "<class 'pandas.core.series.Series'>\n",
      "<type 'str'>\n"
     ]
    }
   ],
   "source": [
    "csvdata=pd.read_csv('C:\\\\pythoncoding\\\\data\\EBR_data\\\\EBR-from2008.csv')\n",
    "xlsdata=pd.read_excel('c:\\\\pythonCoding\\\\data\\\\treasury data\\\\10.xls',header=None)\n",
    "xlsdata.columns=['Date','yield']\n",
    "print type(xlsdata['Date'])\n",
    "print type(xlsdata['Date'][0])\n",
    "print type(csvdata['Date'])\n",
    "print type(csvdata['Date'][0])\n",
    "\n",
    "csvdata.set_index('Date')\n",
    "print type(csvdata['Date'])\n",
    "print type(csvdata['Date'][0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "########以下部分为pandas time series，时间序列部分##########"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Open</th>\n",
       "      <th>High</th>\n",
       "      <th>Low</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>Adj Close</th>\n",
       "      <th>Unnamed: 7</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td> 2015/4/15</td>\n",
       "      <td> 1.97</td>\n",
       "      <td> 2.05</td>\n",
       "      <td> 1.96</td>\n",
       "      <td> 2.02</td>\n",
       "      <td>  594000</td>\n",
       "      <td> 2.02</td>\n",
       "      <td> NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td> 2015/4/14</td>\n",
       "      <td> 1.95</td>\n",
       "      <td> 1.98</td>\n",
       "      <td> 1.93</td>\n",
       "      <td> 1.96</td>\n",
       "      <td> 1670900</td>\n",
       "      <td> 1.96</td>\n",
       "      <td> NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td> 2015/4/13</td>\n",
       "      <td> 1.94</td>\n",
       "      <td> 1.97</td>\n",
       "      <td> 1.92</td>\n",
       "      <td> 1.92</td>\n",
       "      <td>  841800</td>\n",
       "      <td> 1.92</td>\n",
       "      <td> NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td> 2015/4/10</td>\n",
       "      <td> 1.94</td>\n",
       "      <td> 1.96</td>\n",
       "      <td> 1.93</td>\n",
       "      <td> 1.94</td>\n",
       "      <td>  547800</td>\n",
       "      <td> 1.94</td>\n",
       "      <td> NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>  2015/4/9</td>\n",
       "      <td> 1.95</td>\n",
       "      <td> 1.98</td>\n",
       "      <td> 1.93</td>\n",
       "      <td> 1.94</td>\n",
       "      <td> 1523800</td>\n",
       "      <td> 1.94</td>\n",
       "      <td> NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Date  Open  High   Low  Close   Volume  Adj Close Unnamed: 7\n",
       "0  2015/4/15  1.97  2.05  1.96   2.02   594000       2.02        NaN\n",
       "1  2015/4/14  1.95  1.98  1.93   1.96  1670900       1.96        NaN\n",
       "2  2015/4/13  1.94  1.97  1.92   1.92   841800       1.92        NaN\n",
       "3  2015/4/10  1.94  1.96  1.93   1.94   547800       1.94        NaN\n",
       "4   2015/4/9  1.95  1.98  1.93   1.94  1523800       1.94        NaN"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "EBRdata=pd.read_csv('C:\\\\pythoncoding\\\\data\\EBR_data\\\\EBR-from2008.csv')\n",
    "EBRdata.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.series.Series'>\n",
      "<class 'pandas.tslib.Timestamp'>\n"
     ]
    }
   ],
   "source": [
    "EBRdata['Date']=pd.to_datetime(EBRdata['Date']) #把Date转换为Timestam属性，通过set_index（）是不会转换成时间戳Timestam类型\n",
    "print type(EBRdata['Date'])\n",
    "print type(EBRdata['Date'][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "#对于时间序列而言，一般默认为是没有规律的，这里通过resample，重新处理\n",
    "#set_index设置索引，resample按“天\"作为频率，dropna去掉数据中的空值\n",
    "#关于resample的用法见 http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.resample.html\n",
    "EBRopen=EBRdata.set_index('Date').resample('D')['Open'].dropna()\n",
    "EBRall=EBRdata.set_index('Date').resample('D')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#pd的DateOffset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2014-12-25 00:00:00\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "datetime.datetime"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xmasDay=pd.datetime(2014,12,25)\n",
    "print xmasDay\n",
    "type(xmasDay)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Timestamp('2015-11-19 15:41:40.879000')"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nextday=xmasDay+pd.DateOffset(days=2)\n",
    "today=pd.datetime.now()\n",
    "today+pd.DateOffset(weeks=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#Note that datetime.datetime is different from pd.Timestamp. The former is a Python class and is inefficient, while the latter is based on the numpy.datetime64 datatype. The pd.DateOffset object works with pd.Timestamp and adding it to a datetime.datetime function casts that object into a pd.Timestamp object."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Timestamp('2019-06-12 15:41:40.879000')"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "today+2*pd.DateOffset(years=2,month=6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2015-03-01 00:00:00\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Timestamp('2014-12-01 00:00:00')"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pandas.tseries.offsets import QuarterBegin\n",
    "lastDay=pd.datetime(2014,12,1)\n",
    "print lastDay+QuarterBegin()\n",
    "dtoffset=QuarterBegin()\n",
    "dtoffset.rollback(lastDay)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "datetime.date(2019, 11, 30)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = ['2015/10/30', '2019/11/30', '2015/12/31', '2015/5/29', '2015/6/30']\n",
    "b = [parse(d).date() for d in a]\n",
    "max(b)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
